library(tidyverse)
mpg
## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31
## 4 audi a4 2.0 2008 4 auto(av) f 21 30
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26
## 7 audi a4 3.1 2008 6 auto(av) f 18 27
## 8 audi a4 quattro 1.8 1999 4 manual(m5) 4 18 26
## 9 audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25
## 10 audi a4 quattro 2.0 2008 4 manual(m6) 4 20 28
## # ... with 224 more rows, and 2 more variables: fl <chr>, class <chr>
We want to see the relationship between a car’s engine size (displ) and it’s fuel efficiency on the highway (hwy). A car with a low fuel efficiency consumes more fuel than a car with a high fuel efficiency when they travel the same distance.
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy))
In this plot, we can see a downward trend that explains a negative relationship between the two variables, so our hypothesis is that cars with bigger engines are less fuel efficient.
ggplot(data = mpg)
nrow(mtcars)
## [1] 32
ncol(mtcars)
## [1] 11
ggplot(data = mpg) + geom_point(mapping = aes(x = cyl, y =hwy))
ggplot(data = mpg) + geom_point(mapping = aes(x = class, y =drv))
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, color = class))
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, size = class))
## Warning: Using size for a discrete variable is not advised.
## alpha controls the transparency of the points
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, alpha = class))
ggplot(data = mpg) + geom_point(mapping = aes(x = displ, y = hwy, shape = class))
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy), color = "blue")
summary(mpg)
## manufacturer model displ year
## Length:234 Length:234 Min. :1.600 Min. :1999
## Class :character Class :character 1st Qu.:2.400 1st Qu.:1999
## Mode :character Mode :character Median :3.300 Median :2004
## Mean :3.472 Mean :2004
## 3rd Qu.:4.600 3rd Qu.:2008
## Max. :7.000 Max. :2008
## cyl trans drv cty
## Min. :4.000 Length:234 Length:234 Min. : 9.00
## 1st Qu.:4.000 Class :character Class :character 1st Qu.:14.00
## Median :6.000 Mode :character Mode :character Median :17.00
## Mean :5.889 Mean :16.86
## 3rd Qu.:8.000 3rd Qu.:19.00
## Max. :8.000 Max. :35.00
## hwy fl class
## Min. :12.00 Length:234 Length:234
## 1st Qu.:18.00 Class :character Class :character
## Median :24.00 Mode :character Mode :character
## Mean :23.44
## 3rd Qu.:27.00
## Max. :44.00
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = cyl))
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = cyl))
## shape cannot be applied to continuos variable
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, size = cyl, color = cyl))
# For shapes that have a border (like 21), you can colour the inside and
# outside separately. Use the stroke aesthetic to modify the width of the
# border
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, shape = class, stroke = 3))
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have 7.
## Consider specifying shapes manually if you must have them.
## Warning: Removed 62 rows containing missing values (geom_point).
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, colour = displ < 5))
Facets are particularly useful for categorical variables. Split your plot into facets, subplots that each display one subset of the data. To facet your plot by a single variable, use facet_wrap().
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 2)
To facet your plot on the combination of two variables, add facet_grid() to your plot call.
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ cyl)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ class)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(drv ~ .)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_grid(. ~ cyl)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
facet_wrap(~ class, nrow = 2)
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy, color = class))
A geom is the geometrical object that a plot uses to represent data. People often describe plots by the type of geom that the plot uses. For example, bar charts use bar geoms, line charts use line geoms, boxplots use boxplot geoms, and so on. Scatterplots break the trend; they use the point geom. As we see above, you can use different geoms to plot the same data.
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, linetype = drv))
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg) +
geom_smooth(mapping = aes(x = displ, y = hwy, group = drv))
## `geom_smooth()` using method = 'loess'
To display multiple geoms in the same plot, add multiple geom functions to ggplot():
ggplot(data = mpg) +
geom_point(mapping = aes(x = displ, y = hwy)) +
geom_smooth(mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess'
This is a cleaner way of doing the above with less duplication:
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(mapping = aes(color = class)) +
geom_smooth()
## `geom_smooth()` using method = 'loess'
## line chart - two variables: continuous x, continuous x
ggplot(data = mpg) + geom_line(mapping = aes(x = displ, y = hwy))
## box plot - two variables: discrete x, continuous x
ggplot(data = mpg) + geom_boxplot(mapping = aes(x = class, y = hwy))
## histogram - one variable: continuous
ggplot(data = mpg) + geom_histogram(mapping = aes(x = hwy), bins = 20)
## area chart - one variable: continuous
ggplot(data = mpg) + geom_area(stat = "bin", mapping = aes(x = hwy), bins = 20)
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point(show.legend = FALSE) +
geom_smooth(se = FALSE, show.legend = FALSE)
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point(show.legend = FALSE) +
geom_smooth(se = FALSE, show.legend = FALSE)
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'loess'
ggplot() +
geom_point(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_smooth(data = mpg, mapping = aes(x = displ, y = hwy))
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(size = 3) +
geom_smooth(se = FALSE, size = 1.5)
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, group = drv)) +
geom_point(size = 3) +
geom_smooth(se = FALSE, size = 1.5)
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy, color = drv)) +
geom_point(size = 3) +
geom_smooth(se = FALSE, size = 1.5)
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(size = 3, mapping = aes(color = drv)) +
geom_smooth(se = FALSE, size = 1.5)
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(size = 3, mapping = aes(color = drv)) +
geom_smooth(se = FALSE, size = 1.5, mapping = aes(group = drv),
linetype = "dashed", show.legend = TRUE)
## `geom_smooth()` using method = 'loess'
ggplot(data = mpg, mapping = aes(x = displ, y = hwy)) +
geom_point(size = 3, shape = 21, stroke = 1.2, mapping = aes(fill = drv), colour = "white")